import re
import urllib.request
import urllib.error
url="http://blog.csdn.net/"
headers=("User-Agent","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
data=opener.open(url).read().decode("utf-8")
pat='<h2 class="csdn-tracking-statistics".*?<a strategy=".*?" href="(.*?)" target="_blank">'
rst=re.compile(pat,re.S).findall(data)#爬取的为正中间博客内容

for i in range(0,len(rst)):
  try:
    url2=rst[i]
    data2=urllib.request.urlopen(url2).read().decode("utf-8","ingore")
    pat2="<title>(.*?)</title>"
    rst2=re.compile(pat2).findall(data2)
    urllib.request.urlretrieve(url2,"E:\\python\\作业\\"+rst2[0]+".html")
  except urllib.error.URLError as err:
    if hasattr(err,"code"):
      if(err.code==444):
        print("存在被管理员删除的博客")
    if hasattr(err,"reason"):
      print(err.reason)

